from selenium import webdriver
from lxml import etree
import re

pattern_1 = re.compile(r'\n| ')
driver = webdriver.Chrome()
url = 'http://maoyan.com/board/4'
driver.get(url)
next_page_flag = True
while next_page_flag is True:
    element = etree.HTML(driver.page_source)
    for dd in element.xpath('//dl[@class="board-wrapper"]/dd'):
        item = {}
        item['name'] = dd.xpath('./a/@title')[0]
        item['url'] = 'http://maoyan.com' + dd.xpath('./a/@href')[0]
        item['score'] = ''.join(dd.xpath('.//p[@class="score"]//text()'))
        item['time'] = dd.xpath('.//p[@class="releasetime"]/text()')[0]
        item['actor'] = pattern_1.sub('', dd.xpath('.//p[@class="star"]/text()')[0])
        print(item)
    # 检测是否存在下一页
    next_page = element.xpath('//a[text()="下一页"]')
    if len(next_page) > 0:
        driver.find_element_by_xpath('//a[text()="下一页"]').click()
    else:
        next_page_flag = False

driver.quit()